# -*- coding: utf-8 -*-
"""
Code to replicate the simulations in The Consequences of Using a Top 5/RCV System in Nevada by Gelman, Pritsos and Reilly
Created: March 5, 2024
Most recent update: September 10, 2024

"""

import pyrankvote
from pyrankvote import Candidate, Ballot
import os
import pandas as pd
import numpy as np
import random
import time

dy_text=pd.read_csv('/replication/dynata_text.csv') #Imports Dynata sample with text answers
cv_text=pd.read_csv('/replication/convenience_text.csv') #Imports convenience sample with text answers

frames = [dy_text, cv_text] 
data_text = pd.concat(frames) #These lines create a single dataframe
data_text = data_text[data_text['omit'] != 1] #Gets rid of the 8 observations that are being omitted
data_text = data_text.reset_index()
data_text=data_text.drop(columns=['index'])

data_text.sen24_primary = data_text.sen24_primary.str.split('>').str[-1] #Extract name from priamry data. 

#Change first line
data_text=data_text.rename(columns={'Q52_1':"harrison", 'Q52_2':'marchant', 'Q52_3':'rosen', 'Q52_4':'sandoval', 'Q52_5':'segerblom'})
data_text['harrison'] = data_text['harrison'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['marchant'] = data_text['marchant'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['rosen'] = data_text['rosen'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['sandoval'] = data_text['sandoval'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['segerblom'] = data_text['segerblom'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})

#Create partisanship variable - this preserves independents as non-partisans. 
data_text['democrat'] = np.where((data_text['Q1']=='Democrat'), 1, 0) 
data_text['republican'] = np.where((data_text['Q1']=='Republican'), 1, 0) 
data_text['independent'] = np.where((data_text['Q1']=='Independent'), 1, 0) 
data_text['third_party'] = np.where((data_text['Q1']=='Something else'), 1, 0) 
data_text['democrat'].sum()
data_text['republican'].sum()
data_text['independent'].sum()
data_text['third_party'].sum()

#Single category for party ID
data_text.loc[data_text['democrat']==1, 'party_id']=0
data_text.loc[data_text['republican']==1, 'party_id']=1
data_text.loc[data_text['independent']==1, 'party_id']=2
data_text.loc[data_text['third_party']==1, 'party_id']=3

#Create the number of hypothetical partisan splits
all_splits=[]
split = []
party_split = ['dem', 'rep', 'ind']
case=0
while(case<10000): #How many partisan splits you want
    choice=1
    while(choice<95): #95% voters assumed in splits (5% as third party)
        split.append(random.choice(party_split))
        choice=choice+1
        if choice==95: #Once 95 iterations are hit, how to store the data or skip
            split.append(random.choice(party_split))    
            dem = split.count('dem')
            rep = split.count('rep')
            ind = split.count('ind')
            new_split = [dem, rep, ind]
            if dem>45 or dem<25: 
                split=[] 
                pass
            elif rep>45 or rep<25: 
                split=[] 
                pass
            elif ind>30 or ind<15:     
                split=[] 
                pass
            elif new_split==all_splits:
                split=[]
                pass
            else: 
                all_splits.append(new_split)
                split=[]
                case=case+1
                print("case", case)
        
#Primary winner
primary_results = pd.DataFrame()
results_hold = pd.DataFrame()
count=0
third=5
sim=1
splits=0
for i in all_splits:
    start=time.time()
    splits=splits+1
    count=1
    dem=i[0]
    rep=i[1]
    ind=i[2]
    print(dem, rep, ind, splits)
    while (count<=500):      
        if count<500:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            sen24_primary_counts = data2.groupby(['sen24_primary']).size().reset_index(name='counts') #Change
            sen24_primary_counts = sen24_primary_counts #Change
            outcome = pd.DataFrame()
            outcome = sen24_primary_counts.nlargest(5, ['counts']) #Change
            outcome = outcome.sort_values(by=['counts'], ascending=False)
            outcome.index = np.arange(1, len(outcome)+1)
            outcome = outcome.reset_index()
            outcome['sim']=sim
            outcome['dem']=dem
            outcome['rep']=rep
            outcome['ind']=rep

            results_hold = pd.concat([results_hold, outcome])
            outcome = pd.DataFrame()
            count=count+1

        else: 
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])

            sen24_primary_counts = data2.groupby(['sen24_primary']).size().reset_index(name='counts') #Change
            sen24_primary_counts = sen24_primary_counts
            outcome = pd.DataFrame()
            outcome = sen24_primary_counts.nlargest(5, ['counts']) #Change
            outcome = outcome.sort_values(by=['counts'], ascending=False)
            outcome.index = np.arange(1, len(outcome)+1)
            outcome = outcome.reset_index()
            outcome['sim']=sim
            outcome['dem']=dem
            outcome['rep']=rep
            outcome['ind']=rep
                        
            results_hold = pd.concat([results_hold, outcome])
            #advance = primary_results.groupby(['generic_primary']).count()
            outcome = pd.DataFrame()
            count=count+1

    primary_results = pd.concat([primary_results, results_hold])
    results_hold = pd.DataFrame()
    sim = sim+1
    end = time.time()
    print(end-start)


advance2 = primary_results.groupby(['dem'])['sim'].count()
advance2 = advance2.reset_index()
advance2=advance2.rename(columns={'sim':"total",})
advance2['total']= advance2['total']/5                                  

advance3 = primary_results.groupby(['sen24_primary', 'dem'])[('index')].count()
advance3 = advance3.reset_index()
primary_winners = pd.merge(advance3, advance2, how='inner', on='dem')
primary_winners['win_perc'] = primary_winners['index']/primary_winners['total']

os.chdir('/replication/')
primary_winners.to_csv('sen24_primary.csv', index=False)

#This is total counts of winning
advance0 = primary_results.groupby(['sen24_primary']).count()
advance0 = advance0.reset_index()
advance0=advance0.rename(columns={'sim':"total",})
advance0['total']= advance0['total']/5000000
advance0.to_csv('sen24_primary_agg.csv', index=False)

#Run the ranked choice outcomes
champs=[]
results = pd.DataFrame()
count=0
third=5
sim=1
splits=0
for i in all_splits:
    count=1
    dem=i[0]
    rep=i[1]
    ind=i[2]
    splits=splits+1
    print(dem, rep, ind, splits)
    while (count<=500):      
        if count==500:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            sen24 = data2[['harrison', 'marchant', 'rosen', 'sandoval', 'segerblom']] #Change
            sen24 = sen24.reset_index()
            sen24_rcv = pd.melt(sen24, id_vars='index', value_vars=['harrison', 'marchant', 'rosen', 'sandoval', 'segerblom']) #Change
            sen24_rcv= sen24_rcv.sort_values(by=['index', 'value'])
            sen24_rcv = sen24_rcv.dropna(subset = ['value'])
            sen24_rcv['index']=sen24_rcv['index']+10
            sen24_rcv['dups'] = sen24_rcv.duplicated(subset=['index', 'value'], keep=False)
            sen24_rcv['value']=sen24_rcv['value'].astype(int)
            sen24_rcv.loc[sen24_rcv['dups']==True, 'value']='$OVERVOTE'
            sen24_rcv['value']=sen24_rcv['value'].fillna('$UNDERVOTE')
            sen24_rcv['value']=sen24_rcv['value'].astype(str)
            sen24_rcv['index']=sen24_rcv['index'].astype(str)
            sen24_rcv = sen24_rcv.iloc[:,[0,2,1]]
            sen24_rcv=sen24_rcv.rename(columns={'index':"ballot_id", 'value':'rank', 'variable':'candidate_name'})
            ranks2 = sen24_rcv.values.tolist()
            tups = [tuple(l) for l in ranks2]
        
            parsed_csv_file = [(ballot_id, rank, candidate_name) for ballot_id, rank, candidate_name in tups]       
            sorted_csv_file = parsed_csv_file
    
            candidates = {}
            ballots = []
            last_ballot_id = 0
            ranked_candidates = []

            for ballot_id, rank, candidate_name in sorted_csv_file:
                if ballot_id != last_ballot_id and last_ballot_id != 0:
                    ballot = Ballot(ranked_candidates)
                    ballots.append(ballot)
                    ranked_candidates = []

                last_ballot_id = ballot_id
                if candidate_name == "$UNDERVOTE":
                    continue
                if candidate_name == "$OVERVOTE":
                    continue
                if candidate_name in candidates:
                    candidate = candidates[candidate_name]
                else:
                    candidate = Candidate(name=candidate_name)
                    candidates[candidate_name] = candidate
                ranked_candidates.append(candidate)
    
            ballot = Ballot(ranked_candidates)
            ballots.append(ballot)


            harrison=Candidate('harrison') #CHANGE
            marchant=Candidate('marchant') #CHANGE
            rosen=Candidate('rosen') #CHANGE
            sandoval=Candidate('sandoval') #CHANGE
            segerblom=Candidate('segerblom') #CHANGE

            candidates=[harrison, marchant, rosen, sandoval, segerblom]
            election_result = pyrankvote.instant_runoff_voting(candidates, ballots, pick_random_if_blank=False)
            winners = election_result.get_winners()
            win = str(winners)
            first=win[win.find("(")+1:win.find(")")]
            first = first.replace("'", "")
            champs.append(first)
            harrison_win = champs.count('harrison') #CHANGE
            marchant_win = champs.count('marchant') #CHANGE
            rosen_win = champs.count('rosen') #CHANGE
            sandoval_win = champs.count('sandoval') #CHANGE
            segerblom_win = champs.count('segerblom') #CHANGE
            
            sim=sim+1
            #Create temporary df to store results
            temp_results = pd.DataFrame()
            temp_results = pd.DataFrame([{'sim':sim, 'dem':dem,'rep':rep, 'ind':ind, 'third':third, 
                                      'num_harrison':harrison_win, 'num_marchant':marchant_win, 'num_rosen':rosen_win, 'num_sandoval':sandoval_win, 'num_segerblom':segerblom_win}])

            results = pd.concat([results, temp_results])
            temp_results = pd.DataFrame()
            champs=[]
            count=count+1

        else:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            sen24 = data2[['harrison', 'marchant', 'rosen', 'sandoval', 'segerblom']] #Change
            sen24 = sen24.reset_index()
            sen24_rcv = pd.melt(sen24, id_vars='index', value_vars=['harrison', 'marchant', 'rosen', 'sandoval', 'segerblom']) #Change
            sen24_rcv= sen24_rcv.sort_values(by=['index', 'value'])
            sen24_rcv = sen24_rcv.dropna(subset = ['value'])
            sen24_rcv['index']=sen24_rcv['index']+10
            sen24_rcv['dups'] = sen24_rcv.duplicated(subset=['index', 'value'], keep=False)
            sen24_rcv['value']=sen24_rcv['value'].astype(int)
            sen24_rcv.loc[sen24_rcv['dups']==True, 'value']='$OVERVOTE'
            sen24_rcv['value']=sen24_rcv['value'].fillna('$UNDERVOTE')
            sen24_rcv['value']=sen24_rcv['value'].astype(str)
            sen24_rcv['index']=sen24_rcv['index'].astype(str)
            sen24_rcv = sen24_rcv.iloc[:,[0,2,1]]
            sen24_rcv=sen24_rcv.rename(columns={'index':"ballot_id", 'value':'rank', 'variable':'candidate_name'})
            ranks2 = sen24_rcv.values.tolist()
            tups = [tuple(l) for l in ranks2]
        
            parsed_csv_file = [(ballot_id, rank, candidate_name) for ballot_id, rank, candidate_name in tups]        #sorted_csv_file = sorted(parsed_csv_file, key=itemgetter(0,1))
            sorted_csv_file = parsed_csv_file
    
            candidates = {}
            ballots = []
            last_ballot_id = 0
            ranked_candidates = []

            for ballot_id, rank, candidate_name in sorted_csv_file:
                if ballot_id != last_ballot_id and last_ballot_id != 0:
                    ballot = Ballot(ranked_candidates)
                    ballots.append(ballot)
                    ranked_candidates = []

                last_ballot_id = ballot_id
                if candidate_name == "$UNDERVOTE":
                    continue
                if candidate_name == "$OVERVOTE":
                    continue
                if candidate_name in candidates:
                    candidate = candidates[candidate_name]
                else:
                    candidate = Candidate(name=candidate_name)
                    candidates[candidate_name] = candidate
                ranked_candidates.append(candidate)
    
            ballot = Ballot(ranked_candidates)
            ballots.append(ballot)

            harrison=Candidate('harrison') #CHANGE
            marchant=Candidate('marchant') #CHANGE
            rosen=Candidate('rosen') #CHANGE
            sandoval=Candidate('sandoval') #CHANGE
            segerblom=Candidate('segerblom') #CHANGE

            candidates=[harrison, marchant, rosen, sandoval, segerblom]
            election_result = pyrankvote.instant_runoff_voting(candidates, ballots, pick_random_if_blank=False)
            winners = election_result.get_winners()
            win = str(winners)
            first=win[win.find("(")+1:win.find(")")]
            first = first.replace("'", "")
            champs.append(first)
        
            count=count+1

results= results.sort_values(by=['dem', 'rep'])
results2 = results.groupby(['dem'])[('sim')].count()
results2= results2.to_frame()
results_wins = results.groupby(['dem'])[('num_harrison', 'num_marchant', 'num_rosen', 'num_sandoval', 'num_segerblom')].sum()
final_winners = pd.merge(results2, results_wins, how='inner', on='dem')
final_winners['harrison_perc'] = final_winners['num_harrison']/(final_winners['sim']*500)
final_winners['marchant_perc'] = final_winners['num_marchant']/(final_winners['sim']*500)
final_winners['rosen_perc'] = final_winners['num_rosen']/(final_winners['sim']*500)
final_winners['sandoval_perc'] = final_winners['num_sandoval']/(final_winners['sim']*500)
final_winners['segerblom_perc'] = final_winners['num_segerblom']/(final_winners['sim']*500)
final_winners = final_winners.reset_index()

final_winners.to_csv('sen24_general.csv', index=False)
